Environment Setting
# Import required packages
library(tidyverse)
library(tmap)
library(ggplot2)
library(units)
library(sf)
library(leaflet)
library(tidycensus)
library(leafsync)
library(dbscan)
library(sfnetworks)
library(tigris)
library(tidygraph)
library(plotly)
wd <- file.path(Sys.getenv('setwd'),"work/working/School/UA_2022/external/Lab/module_2")
setwd(eval(wd))
What is Open Street Map
https://journal.r-project.org/archive/2013/RJ-2013-005/RJ-2013-005.pdf
https://r-spatial.org/r/2019/09/26/spatial-networks.html
https://cran.r-project.org/web/packages/dodgr/vignettes/dodgr.html
library(osmdata)
## Data (c) OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright
# Get bounding box coordinates for Atlanta
bb <- getbb('Atlanta, GA')
# Converting bb into an sf object
bb_sf <- bb %>% t %>% data.frame() %>%
st_as_sf(coords = c("x", "y"), crs = 4326) %>%
st_bbox() %>%
st_as_sfc()
## Plot--
tmap_mode('view')
## tmap mode set to interactive viewing
tm_shape(bb_sf) + tm_borders()
OSM
wiki provides a detailed description on various ‘key-value’ pairs.
To download all possible key:value pairs, you can insert
available_tags("highway") instead of manually specifying a
list of values. One caveat is that, using all available tags will
generate a large data, significantly slowing down the processing
speed.
# Get OSM road data
osm_road <- opq(bbox = bb) %>%
add_osm_feature(key = 'highway',
value = c("motorway", "trunk", "primary",
"secondary", "tertiary", "unclassified",
"residential")) %>%
osmdata_sf() %>%
osm_poly2line()
names(osm_road)
## [1] "bbox" "overpass_call" "meta"
## [4] "osm_points" "osm_lines" "osm_polygons"
## [7] "osm_multilines" "osm_multipolygons"
## Plot--
tmap_mode('plot')
## tmap mode set to plotting
tm_shape(osm_road$osm_lines) + tm_lines(col = "highway")
# Breakdown of highway types
round( prop.table(table(osm_road$osm_lines$highway)) * 100, 1 )
##
## motorway motorway_link primary residential secondary
## 3.7 0.0 3.7 68.4 11.6
## tertiary trunk unclassified
## 8.6 1.5 2.5
Defining a custom bounding box
Displaying (and some computations) the entire network we just downloaded can slow down R environment. For class exercise, we will do the calculation for the whole network but limit the display to a smaller area. For that, we will create a custom bbox using coordinates of our selection. This technique of generating your own bounding box can also be useful if you have a specific area of interest that doesn’t overlap well with commonly used boundaries.
You need to define two points for bounding box, one point at the
lower left corner and the other at the upper right corner. You can go to
Google Maps, right-click on a point on map, and copy the XY coordinate.
Let’s store them in p1 and p2.
# p1 is lower left corner, p2 is the upper right corner
p1 <- c(33.746217847959734, -84.40851957882589)
p2 <- c(33.785889694219634, -84.36354430149285)
You will then need to format the two coordinates into the same format
as bb, the bounding box object we created above. Then,
let’s convert it into sf object.
# Custom BB
my_bb <- matrix(c(p1[2], p1[1],
p2[2], p2[1]), ncol = 2)
colnames(my_bb) <- c("min", "max")
rownames(my_bb) <- c("x", "y")
# Custom BB to sf
my_bb_sf <- my_bb %>% t %>% data.frame() %>%
st_as_sf(coords = c("x", "y"), crs = 4326) %>%
st_bbox() %>%
st_as_sfc()
## Plot--
tmap_mode('view')
## tmap mode set to interactive viewing
tm_shape(bb_sf) + tm_borders(col = "black") + # Black = larger bbox
tm_shape(my_bb_sf) + tm_borders(col = "red") # Red = smaller bbox
Converting OSM data into a graph
# Extract a smaller network for exercise purpose
osm_small <- osm_road$osm_lines[my_bb_sf,]
# Converting the line component of OSM data into a graph (using a projected coordinate system for filtering below)
net <- sfnetworks::as_sfnetwork(osm_small, directed = FALSE)
print(net)
## # A sfnetwork with 1668 nodes and 1609 edges
## #
## # CRS: EPSG:4326
## #
## # An undirected multigraph with 246 components with spatially explicit edges
## #
## # Node Data: 1,668 × 1 (active)
## # Geometry type: POINT
## # Dimension: XY
## # Bounding box: xmin: -84.41749 ymin: 33.73862 xmax: -84.3525 ymax: 33.7971
## geometry
## <POINT [°]>
## 1 (-84.39738 33.76535)
## 2 (-84.39613 33.76534)
## 3 (-84.40845 33.77004)
## 4 (-84.40837 33.77255)
## 5 (-84.3786 33.7669)
## 6 (-84.37793 33.76562)
## # … with 1,662 more rows
## #
## # Edge Data: 1,609 × 239
## # Geometry type: LINESTRING
## # Dimension: XY
## # Bounding box: xmin: -84.41749 ymin: 33.73862 xmax: -84.3525 ymax: 33.7971
## from to osm_id name abando… abando… access access… addr.c… addr.c…
## <int> <int> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1 2 92346… Mill… <NA> <NA> <NA> <NA> <NA> <NA>
## 2 3 4 92346… Trav… <NA> <NA> <NA> <NA> <NA> <NA>
## 3 5 6 92354… Rena… <NA> <NA> priva… <NA> <NA> <NA>
## # … with 1,606 more rows, and 229 more variables: addr.housenumber <chr>,
## # addr.postcode <chr>, addr.state <chr>, addr.street <chr>, alt_name <chr>,
## # alt_name_1 <chr>, attribution <chr>, bicycle <chr>,
## # bicycle.lanes.backward <chr>, bicycle.lanes.forward <chr>, bridge <chr>,
## # bridge.name <chr>, bus <chr>, bus.lanes <chr>, center_turn_lane <chr>,
## # centre_turn_lane <chr>, change <chr>, change.backward <chr>,
## # change.forward <chr>, change.lanes <chr>, change.lanes.backward <chr>,
## # change.lanes.forward <chr>, check_date <chr>, check_date.surface <chr>,
## # class.bicycle <chr>, construction <chr>, covered <chr>, created_by <chr>,
## # cutting <chr>, cycleway <chr>, cycleway.both <chr>,
## # cycleway.est_width <chr>, cycleway.left <chr>, cycleway.right <chr>,
## # description <chr>, destination <chr>, destination.backward <chr>,
## # destination.forward <chr>, destination.lanes <chr>,
## # destination.lanes.forward <chr>, destination.ref <chr>,
## # destination.ref.backward <chr>, destination.ref.forward <chr>,
## # destination.ref.lanes <chr>, destination.ref.lanes.forward <chr>,
## # destination.ref.to <chr>, destination.ref.to.backward <chr>,
## # destination.ref.to.lanes <chr>, destination.symbol <chr>,
## # embedded_rails <chr>, expressway <chr>, fee <chr>, fixme <chr>,
## # FIXME <chr>, foot <chr>, ford <chr>, gatech.PRKG_NUM <chr>, goods <chr>,
## # HFCS <chr>, hgv <chr>, hgv.lanes <chr>, hgv.minaxles <chr>,
## # hgv.national_network <chr>, highway <chr>, highway_1 <chr>, horse <chr>,
## # hov <chr>, hov.lanes <chr>, incline <chr>, is_in.city <chr>,
## # junction <chr>, junction.ref <chr>, LandPro08.DE3 <chr>,
## # LandPro08.DE5 <chr>, lane_markings <chr>, lanes <chr>,
## # lanes.backward <chr>, lanes.both_ways <chr>, lanes.forward <chr>,
## # layer <chr>, level <chr>, lit <chr>, loc_name <chr>, maxheight <chr>,
## # maxheight.lane <chr>, maxlength <chr>, maxspeed <chr>,
## # maxspeed.advisory <chr>, maxspeed.type <chr>, maxspeed.variable <chr>,
## # maxweight <chr>, maxweight.signed <chr>, minspeed <chr>,
## # motor_vehicle <chr>, motor_vehicle.conditional <chr>,
## # motor_vehicle.lanes <chr>, motorcycle <chr>, motorcycle.lanes <chr>,
## # name.etymology.wikidata <chr>, name_ <chr>, …
## Plot--
leaflet() %>%
addProviderTiles(providers$CartoDB.DarkMatter) %>%
setView( -84.3854, 33.7668, zoom = 14) %>% # zooming in to show more details
addPolylines(data = net %>% st_as_sf('edges'), col = "grey", weight = 3, opacity = 0.9) %>%
addCircles(data = net %>% st_as_sf('nodes'), fillColor = 'yellow',
stroke = F, radius = 20, fillOpacity = 0.7)
Cleaning network
Simplifying network
Multiple edges can connect the same pair of nodes, called
multiple edges. There can also be loops that starts and ends at
the same node (e.g., think of a cul-de-sac). The former case can be
detected using edge_is_multiple() and the latter using
edge_is_loop().
When removing a set of multiple edges using functions shown below, they always keep the first edge and discard others. By arranging the order of edges for each set of multiple edges, you can specify which edge you want to preserve.
This way of simplification means that, when the multiple edges within
a set have different attributes, all attribute information except for
the preserved one would be lost. In such cases, you can merge
those edges. Then, the output would have the geometry of the first edge
in the set, but the attributes would be some summary of all the edges in
the set. to_spatial_simple() function does this work.
# Let's simplify our network
simple_net <- net %>%
activate("edges") %>%
filter(!edge_is_multiple()) %>%
filter(!edge_is_loop())
# Because the difference is not really discernible, we just print out the differences in the edge count.
message(str_c("Before simplification, there were ", net %>% st_as_sf("edges") %>% nrow(), " edges. \n",
"After simplification, there are ", simple_net %>% st_as_sf("edges") %>% nrow(), " edges."))
## Before simplification, there were 1609 edges.
## After simplification, there are 1600 edges.
Subdivide edges
When as_sfnetwork() function converts an sf linestrings,
the nodes are defined as the endpoints of each linestring. If
you zoom into Midtown in the map above, you will see that there are many
intersections that do not have nodes, which are errors. We can use
to_spatial_subdivision predicate to create points at the
intersections and cut the edges accordingly.
To better understand
# Subdivision
subdiv_net <- convert(simple_net, sfnetworks::to_spatial_subdivision)
## Warning: to_spatial_subdivision assumes attributes are constant over geometries
# Add a custom index -> This will be used for visualization purposes later
subdiv_net <- subdiv_net %>%
activate("nodes") %>%
mutate(custom_id = seq(1, subdiv_net %>% st_as_sf("nodes") %>% nrow()),
is.new = case_when(is.na(.tidygraph_node_index) ~ "new nodes",
TRUE ~ "existing nodes"),
is.new = factor(is.new))
## Plot--
subdiv_pal <- colorFactor(palette = c("yellow", "red"), domain = subdiv_net %>% st_as_sf("nodes") %>% pull(is.new))
subdiv_map <- leaflet() %>%
addProviderTiles(providers$CartoDB.DarkMatter) %>%
setView( -84.3854, 33.7668, zoom = 14) %>% # zooming in to show more details
addPolylines(data = subdiv_net %>% st_as_sf('edges'), col = "grey", weight = 3, opacity = 0.9) %>%
addCircles(data = subdiv_net %>% st_as_sf('nodes'), fillColor = ~subdiv_pal(is.new), stroke = F, radius = 20, fillOpacity = 0.7) %>%
addLegend(position = "bottomright", pal = subdiv_pal, values = subdiv_net %>% st_as_sf("nodes") %>% pull(is.new))
subdiv_map
Delete pseudo nodes
smoothed_net <- convert(subdiv_net, sfnetworks::to_spatial_smooth)
# Extract removed points
removed <- setdiff(subdiv_net %>% st_as_sf('nodes') %>% pull(custom_id),
smoothed_net %>% st_as_sf('nodes') %>% pull(custom_id))
smooth_map <- leaflet() %>%
addProviderTiles(providers$CartoDB.DarkMatter) %>%
setView( -84.3854, 33.7668, zoom = 14) %>% # zooming in to show more details
addPolylines(data = smoothed_net %>% st_as_sf('edges'), col = "grey", weight = 3, opacity = 0.9) %>%
addCircles(data = smoothed_net %>% st_as_sf('nodes'), fillColor = 'yellow', stroke = F, radius = 20, fillOpacity = 0.7) %>%
addCircles(data = subdiv_net %>% st_as_sf("nodes") %>% filter(custom_id %in% removed),
fillColor = "red", stroke = F, radius = 15, fillOpacity = 0.8) %>%
addControl(html = htmltools::HTML("<b>Red points denote removed nodes</b>"), position = "bottomright")
smooth_map
Calculate centrality
# Calculate centrality measures
network_char <- smoothed_net %>%
activate("edges") %>%
mutate(weight = edge_length()) %>%
mutate(edge_bc = centrality_edge_betweenness(weights = weight, directed = F)) %>%
activate("nodes") %>%
mutate(node_bc = centrality_betweenness(weights = weight, directed = F))
## Warning in betweenness(graph = graph, v = V(graph), directed = directed, :
## 'nobigint' is deprecated since igraph 1.3 and will be removed in igraph 1.4
# Edge betweenness
bet_pal_edge <- colorNumeric(palette = "Reds", domain = network_char %>% activate("edges") %>% pull(edge_bc), n = 6)
# Node betweenness
bet_pal_node <- colorNumeric(palette = "Reds", domain = network_char %>% activate("nodes") %>% pull(node_bc), n = 6)
# Map
leaflet() %>%
addProviderTiles(providers$CartoDB.DarkMatter) %>%
setView( -84.3854, 33.7668, zoom = 14) %>% # zooming in to show more details
addPolylines(data = network_char %>% st_as_sf("edges"),
color = ~bet_pal_edge(network_char %>% st_as_sf('edges') %>% pull(edge_bc)), weight = 3, opacity = 0.7) %>%
addCircles(data = network_char %>% st_as_sf("nodes"),
fillColor = ~bet_pal_node(network_char %>% st_as_sf('nodes') %>% pull(node_bc)), stroke = F, fillOpacity = 0.7,
radius = network_char %>% st_as_sf("nodes") %>% with(.$node_bc/(max(.$node_bc)/100))) # denominator is selected to make the max value roughly equal to 100
# Try 2 more other centrality measures.
# Find good ways to calculate the radius argument in addCircles() function for visually pleasing maps for the 2 centrality measures.
Shortest path
# Change crs for convenience
smoothed_net <- smoothed_net %>% st_transform(4326)
# Start point
start_p <- st_point(c(-84.40364459476174,33.776160322717544)) %>% st_sfc(crs = 4326) # CRC at Georgia Tech
# End point
target_p <- st_point(c(-84.37639335217811, 33.75718076235044)) %>% st_sfc(crs = 4326) # MLK National Historical Park
# Get the shortest path
paths = st_network_paths(smoothed_net, from = start_p, to = target_p)
# Extract shortest path
paths_sf <- smoothed_net %>%
slice(paths$node_paths[[1]])
# Visualize
leaflet() %>%
addProviderTiles(providers$CartoDB.DarkMatter) %>%
setView( -84.3854, 33.7668, zoom = 14) %>% # zooming in to show more details
addPolylines(data = smoothed_net %>% st_as_sf("edges"), color = 'grey', weight = 2, opacity = 0.7) %>%
addPolylines(data = paths_sf %>% st_as_sf("edges"), color = "red", weight = 4, opacity = 0.8) %>%
addCircles(data = paths_sf %>% st_as_sf("nodes"), stroke = F, fillColor = "red", fillOpacity = 0.8, radius = 50)
## Assignment: sample one point from each tract, calculate average travel time from one census tract to all others. Repeat for all tracts.
## See if there is any patterns to the accessibility.
Extract intersections
end_points <- smoothed_net %>%
st_as_sf('nodes') %>%
st_join(smoothed_net %>% activate("edges") %>% st_as_sf())
intersections <- end_points %>%
group_by(.tidygraph_node_index) %>%
summarise(n = n()) %>%
filter(n > 1)
leaflet() %>%
addProviderTiles(providers$CartoDB.DarkMatter) %>%
setView( -84.3854, 33.7668, zoom = 14) %>% # zooming in to show more details
addPolylines(data = smoothed_net %>% st_as_sf('edges'), col = "grey", weight = 3, opacity = 0.9) %>%
addCircles(data = intersections %>% st_as_sf('nodes'), fillColor = 'red', stroke = F, radius = 20, fillOpacity = 0.7)
Merge it with Census data
Let’s recycle the code for downloading Census data through API from last week.
acs2020c <- acs2020 %>%
select(GEOID,
hhinc = hhincE,
r_tot = r_totE,
r_wh = r_whE,
r_bl = r_blE,
tot_hh = tot_hhE,
own_novhc = own_novhcE,
rent_novhc = rent_novhcE) %>%
mutate(pct_wh = r_wh / r_tot,
pct_bl = r_bl / r_tot,
pct_novhc = (own_novhc + rent_novhc)/tot_hh) %>%
mutate(area1 = unclass(st_area(.))) %>%
st_transform(26967) %>%
mutate(area2 = unclass(st_area(.))) %>%
st_transform(crs = 4326) %>%
mutate(ln_pop_den = log((r_tot / (area1/1000^2)) + 1)) %>%
filter(!is.na(hhinc), !is.na(r_tot), !is.na(own_novhc))
Spatial join the Census data with our network
# Before we do the join, let's use centrality measures that were calculated on the entire network.
# It takes time, so I calculated it and saved it.
# You can read the data back in
load("D:/Dropbox (GaTech)/Work/Working/School/UA_2022/external/Lab/module_2/network_char.RData")
census_centrality <- acs2020c %>%
st_join(network_char %>% st_as_sf("nodes") %>% st_transform(crs = 4326)) %>% # When I calculated centrality measures, the CRS was 26967, so reverting it back to 4326
group_by(GEOID) %>%
summarise(n = n(),
hhinc = mean(hhinc, na.rm = T),
pct_wh = mean(pct_wh, na.rm = T),
pct_bl = mean(pct_bl, na.rm = T),
pct_novhc = mean(pct_novhc, na.rm = T),
node_bc = mean(node_bc, na.rm = T))
tm_shape(acs2020c) + tm_polygons(col = "grey", alpha = 0.5) +
tm_shape(census_centrality) + tm_polygons(col = "node_bc", style = "quantile") +
tm_shape(bb_sf) + tm_borders()
census_centrality_plot <- census_centrality %>%
mutate(hhinc = log(hhinc),
pct_novhc = log(pct_novhc + 0.02)) %>%
pivot_longer(cols = c('hhinc', 'pct_wh', 'pct_bl', 'pct_novhc'), names_to = "variable", values_to = "value") %>%
mutate(variable = factor(variable, labels = c("Household Income (logged)", "% Black", "% No HH with no cars (logged)", "% White")))
centrality_plot <- census_centrality_plot %>%
ggplot() +
geom_point(aes(x = node_bc, y = value), alpha = 0.2) +
facet_wrap(~variable, scales = "free_y") +
labs(x = "Centrality", title = "Centrality VS. Socio-demographics") +
theme_bw()
centrality_plot + ggpubr::stat_cor(aes(x = node_bc, y = value))
## Warning: Removed 896 rows containing non-finite values (stat_cor).
## Warning: Removed 896 rows containing missing values (geom_point).
ggplotly(centrality_plot)